sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.34 R6_2.5.1 fastmap_1.1.1 xfun_0.42
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.7 rmarkdown_2.25
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.8 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.15.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.6.1 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
First, load the necessary packages.
# load libraries - notes show the install command needed to install (pre installed)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(grDevices)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:circlize':
##
## degree
## The following object is masked from 'package:plyr':
##
## join
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
biomin <- read.csv("../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv") %>% rename("Pocillopora_acuta_best_hit" = "gene_id" )
geneInfo <- read.csv("../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv")
biomin_mod <- merge(biomin, geneInfo, by=c("gene_id"), all=F)
head(biomin_mod)
## gene_id accessionnumber.geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 XP_022804785.1
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 P33_g8985
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 JR972076.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Gene:g13552
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 aug_v2a.06327.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 PFX18785.1
## definition
## 1 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 2 Flagellar associated protein
## 3 Acidic skeletal organic matrix protein (Acidic SOMP)
## 4 Acidic SOMP (Full-Length p27)
## 5 SAARP3
## 6 Mucin-4 [Stylophora pistillata]
## Ref X seqnames start
## 1 Peled et al., 2020 224 Pocillopora_acuta_HIv2___Sc0000021 2329764
## 2 Drake et al., 2013 604 Pocillopora_acuta_HIv2___Sc0000013 2026351
## 3 Ramos-Silva et al., 2013 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 4 Mummadisetti et al., 2021 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 5 Takeuchi et al., 2016 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 6 Peled et al., 2020 1240 Pocillopora_acuta_HIv2___Sc0000005 2364120
## end width strand source type score.x phase
## 1 2349234 19471 + AUGUSTUS transcript NA NA
## 2 2032291 5941 - AUGUSTUS transcript NA NA
## 3 7864189 3795 + AUGUSTUS transcript NA NA
## 4 7864189 3795 + AUGUSTUS transcript NA NA
## 5 7864189 3795 + AUGUSTUS transcript NA NA
## 6 2382635 18516 + AUGUSTUS transcript NA NA
## ID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## transcript_id seed_ortholog evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 45351.EDO38228 1.51e-306
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 6087.XP_002163848.1 4.11e-211
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 <NA> NA
## score.y
## 1 851
## 2 607
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## eggNOG_OGs
## 1 COG0695@1|root,COG1249@1|root,KOG1752@2759|Eukaryota,KOG4716@2759|Eukaryota,38BHT@33154|Opisthokonta,3BBN5@33208|Metazoa
## 2 28N88@1|root,2QUTJ@2759|Eukaryota,39ZRG@33154|Opisthokonta,3BMP5@33208|Metazoa
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## max_annot_lvl COG_category Description
## 1 33208|Metazoa C thioredoxin-disulfide reductase activity
## 2 33208|Metazoa - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## Preferred_name
## 1 TXNRD1
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GOs
## 1 GO:0000003;GO:0000302;GO:0000305;GO:0001650;GO:0001704;GO:0001707;GO:0001887;GO:0001890;GO:0003006;GO:0003674;GO:0003824;GO:0004791;GO:0005488;GO:0005515;GO:0005575;GO:0005622;GO:0005623;GO:0005634;GO:0005654;GO:0005730;GO:0005737;GO:0005739;GO:0005783;GO:0005829;GO:0006082;GO:0006139;GO:0006518;GO:0006520;GO:0006575;GO:0006725;GO:0006732;GO:0006733;GO:0006739;GO:0006749;GO:0006753;GO:0006790;GO:0006793;GO:0006796;GO:0006807;GO:0006950;GO:0006979;GO:0007154;GO:0007165;GO:0007275;GO:0007369;GO:0007498;GO:0008150;GO:0008152;GO:0008283;GO:0009056;GO:0009069;GO:0009117;GO:0009611;GO:0009628;GO:0009636;GO:0009653;GO:0009790;GO:0009888;GO:0009987;GO:0010035;GO:0010038;GO:0010269;GO:0010941;GO:0010942;GO:0012505;GO:0015036;GO:0015949;GO:0016043;GO:0016174;GO:0016209;GO:0016259;GO:0016491;GO:0016651;GO:0016667;GO:0016668;GO:0016999;GO:0017001;GO:0017144;GO:0018996;GO:0019216;GO:0019222;GO:0019362;GO:0019637;GO:0019725;GO:0019752;GO:0022404;GO:0022414;GO:0022607;GO:0023052;GO:0031974;GO:0031981;GO:0032501;GO:0032502;GO:0033554;GO:0033797;GO:0034599;GO:0034641;GO:0036295;GO:0036296;GO:0036477;GO:0042221;GO:0042303;GO:0042395;GO:0042493;GO:0042537;GO:0042592;GO:0042737;GO:0042743;GO:0042744;GO:0042802;GO:0042803;GO:0043025;GO:0043167;GO:0043169;GO:0043226;GO:0043227;GO:0043228;GO:0043229;GO:0043231;GO:0043232;GO:0043233;GO:0043436;GO:0043603;GO:0043933;GO:0044085;GO:0044237;GO:0044238;GO:0044248;GO:0044281;GO:0044297;GO:0044422;GO:0044424;GO:0044428;GO:0044444;GO:0044446;GO:0044452;GO:0044464;GO:0045340;GO:0045454;GO:0046483;GO:0046496;GO:0046688;GO:0046872;GO:0046914;GO:0046983;GO:0048332;GO:0048513;GO:0048518;GO:0048522;GO:0048598;GO:0048608;GO:0048646;GO:0048678;GO:0048729;GO:0048731;GO:0048856;GO:0050664;GO:0050789;GO:0050794;GO:0050896;GO:0051186;GO:0051187;GO:0051259;GO:0051262;GO:0051716;GO:0055086;GO:0055093;GO:0055114;GO:0061458;GO:0065003;GO:0065007;GO:0065008;GO:0070013;GO:0070276;GO:0070482;GO:0070887;GO:0070995;GO:0071241;GO:0071248;GO:0071280;GO:0071453;GO:0071455;GO:0071704;GO:0071840;GO:0072524;GO:0072593;GO:0080090;GO:0097237;GO:0097458;GO:0098623;GO:0098625;GO:0098626;GO:0098754;GO:0098869;GO:1901360;GO:1901564;GO:1901605;GO:1901700;GO:1990748
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## EC KEGG_ko KEGG_Pathway
## 1 1.8.1.9 ko:K22182 ko00450,ko05200,ko05225,map00450,map05200,map05225
## 2 - - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC
## 1 - R03596,R09372 RC02518,RC02873 ko00000,ko00001,ko01000 -
## 2 - - - - -
## 3 <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## CAZy BiGG_Reaction PFAMs KEGG_new
## 1 - - Glutaredoxin,Pyr_redox_2,Pyr_redox_dim K22182
## 2 - - - <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> K22017
## substanceBXH geneSymbol
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 Pocillopora_acuta_HIv2___Sc0000021
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 Pocillopora_acuta_HIv2___Sc0000013
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 Pocillopora_acuta_HIv2___Sc0000005
## moduleColor
## 1 brown
## 2 turquoise
## 3 red
## 4 red
## 5 red
## 6 pink
## GO.terms
## 1 GO:0000003,GO:0000302,GO:0000305,GO:0001650,GO:0001704,GO:0001707,GO:0001887,GO:0001890,GO:0003006,GO:0003674,GO:0003824,GO:0004791,GO:0005488,GO:0005515,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0005737,GO:0005739,GO:0005783,GO:0005829,GO:0006082,GO:0006139,GO:0006518,GO:0006520,GO:0006575,GO:0006725,GO:0006732,GO:0006733,GO:0006739,GO:0006749,GO:0006753,GO:0006790,GO:0006793,GO:0006796,GO:0006807,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007275,GO:0007369,GO:0007498,GO:0008150,GO:0008152,GO:0008283,GO:0009056,GO:0009069,GO:0009117,GO:0009611,GO:0009628,GO:0009636,GO:0009653,GO:0009790,GO:0009888,GO:0009987,GO:0010035,GO:0010038,GO:0010269,GO:0010941,GO:0010942,GO:0012505,GO:0015036,GO:0015949,GO:0016043,GO:0016174,GO:0016209,GO:0016259,GO:0016491,GO:0016651,GO:0016667,GO:0016668,GO:0016999,GO:0017001,GO:0017144,GO:0018996,GO:0019216,GO:0019222,GO:0019362,GO:0019637,GO:0019725,GO:0019752,GO:0022404,GO:0022414,GO:0022607,GO:0023052,GO:0031974,GO:0031981,GO:0032501,GO:0032502,GO:0033554,GO:0033797,GO:0034599,GO:0034641,GO:0036295,GO:0036296,GO:0036477,GO:0042221,GO:0042303,GO:0042395,GO:0042493,GO:0042537,GO:0042592,GO:0042737,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0043025,GO:0043167,GO:0043169,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043436,GO:0043603,GO:0043933,GO:0044085,GO:0044237,GO:0044238,GO:0044248,GO:0044281,GO:0044297,GO:0044422,GO:0044424,GO:0044428,GO:0044444,GO:0044446,GO:0044452,GO:0044464,GO:0045340,GO:0045454,GO:0046483,GO:0046496,GO:0046688,GO:0046872,GO:0046914,GO:0046983,GO:0048332,GO:0048513,GO:0048518,GO:0048522,GO:0048598,GO:0048608,GO:0048646,GO:0048678,GO:0048729,GO:0048731,GO:0048856,GO:0050664,GO:0050789,GO:0050794,GO:0050896,GO:0051186,GO:0051187,GO:0051259,GO:0051262,GO:0051716,GO:0055086,GO:0055093,GO:0055114,GO:0061458,GO:0065003,GO:0065007,GO:0065008,GO:0070013,GO:0070276,GO:0070482,GO:0070887,GO:0070995,GO:0071241,GO:0071248,GO:0071280,GO:0071453,GO:0071455,GO:0071704,GO:0071840,GO:0072524,GO:0072593,GO:0080090,GO:0097237,GO:0097458,GO:0098623,GO:0098625,GO:0098626,GO:0098754,GO:0098869,GO:1901360,GO:1901564,GO:1901605,GO:1901700,GO:1990748
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GO.description GS.Flat GS.Slope p.GS.Flat
## 1 thioredoxin-disulfide reductase activity 0.57178848 -0.57178848 3.311055e-05
## 2 - -0.29586493 0.29586493 4.589336e-02
## 3 <NA> 0.35628512 -0.35628512 1.508700e-02
## 4 <NA> 0.35628512 -0.35628512 1.508700e-02
## 5 <NA> 0.35628512 -0.35628512 1.508700e-02
## 6 <NA> -0.05455251 0.05455251 7.187880e-01
## p.GS.Slope A.brown p.A.brown A.magenta p.A.magenta A.red
## 1 3.311055e-05 0.7005073 5.973619e-08 -0.3738439 1.048844e-02 0.2901298
## 2 4.589336e-02 -0.4291375 2.921081e-03 0.3115539 3.505853e-02 -0.3452015
## 3 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 4 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 5 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 6 7.187880e-01 0.0972208 5.203783e-01 -0.3252127 2.743096e-02 0.3709019
## p.A.red A.turquoise p.A.turquoise A.purple p.A.purple A.green
## 1 5.047695e-02 -0.43323233 2.634293e-03 0.6984202 6.792759e-08 0.4574538
## 2 1.879503e-02 0.58815287 1.720729e-05 -0.1784560 2.353887e-01 -0.1306835
## 3 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 4 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 5 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 6 1.116224e-02 0.08164806 5.895928e-01 -0.1391597 3.563456e-01 0.1614616
## p.A.green A.lightcyan p.A.lightcyan A.pink p.A.pink A.blue
## 1 0.001391986 -0.3508191 1.682948e-02 0.1707384 2.565893e-01 0.12358439
## 2 0.386672688 0.1196505 4.283449e-01 -0.1522331 3.125037e-01 -0.58598406
## 3 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 4 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 5 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 6 0.283719167 -0.5276145 1.646022e-04 0.6417477 1.537006e-06 -0.02286640
## p.A.blue A.salmon p.A.salmon A.midnightblue p.A.midnightblue
## 1 4.132051e-01 0.1178467 0.435389343 0.2439890 0.10224333
## 2 1.880492e-05 0.1907995 0.204028320 0.2258109 0.13131383
## 3 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 4 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 5 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 6 8.801027e-01 0.2940377 0.047315397 0.2906592 0.05003895
## A.black p.A.black A.cyan p.A.cyan A.yellow p.A.yellow
## 1 -0.28430645 0.05550307 0.04904562 0.7461773 0.05522073 0.7154873547
## 2 -0.18825739 0.21023361 0.07386502 0.6256510 -0.14392338 0.3399558326
## 3 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 4 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 5 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 6 0.02103556 0.88964060 -0.13338389 0.3768501 -0.46998526 0.0009821983
## A.tan p.A.tan Length
## 1 0.2648346 0.075293267 19471
## 2 0.2613466 0.079363532 5941
## 3 -0.2055446 0.170565420 3795
## 4 -0.2055446 0.170565420 3795
## 5 -0.2055446 0.170565420 3795
## 6 -0.3805358 0.009084622 18516
length(unique(biomin_mod$gene_id))
## [1] 65
plyr::count(biomin_mod, "moduleColor")
## moduleColor freq
## 1 black 3
## 2 blue 36
## 3 brown 17
## 4 cyan 2
## 5 green 3
## 6 magenta 1
## 7 pink 7
## 8 red 18
## 9 salmon 6
## 10 tan 3
## 11 turquoise 21
## 12 yellow 10
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector_biomin <- biomin_mod %>%
pull(gene_id) %>% unique()
##Get a list of GO Terms for each module
GO.terms_biomin <- biomin_mod %>%
dplyr::select(GOs,gene_id) %>% unique() %>% rename(GOs = "GO.terms")
dim(GO.terms_biomin)
## [1] 65 2
dim(GO.terms_biomin %>% filter(is.na(GO.terms)))
## [1] 43 2
dim(GO.terms_biomin %>% filter(!is.na(GO.terms)))
## [1] 22 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_biomin$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms_biomin$gene_id, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene_id", "GO.terms")
GO.terms_biomin <- split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector = as.integer(ALL.vector %in% ID.vector_biomin)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector_biomin, gene2cat=GO.terms_biomin, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
head(GO)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 102 GO:0003674 0 1 20
## 108 GO:0003824 0 1 10
## 126 GO:0005488 0 1 14
## 130 GO:0005515 0 1 12
## 133 GO:0005575 0 1 21
## 136 GO:0005622 0 1 15
## numInCat term ontology
## 102 20 molecular_function MF
## 108 10 catalytic activity MF
## 126 14 binding MF
## 130 12 protein binding MF
## 133 21 cellular_component CC
## 136 15 intracellular anatomical structure CC
#adjust p-values
GO$bh_adjust <- p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(bh_adjust<0.05) %>%
dplyr::arrange(., ontology, bh_adjust)
#Filtering for p < 0.00001
# GO_00001 <- GO %>%
# dplyr::filter(bh_adjust<0.00001) %>%
# dplyr::arrange(., ontology, bh_adjust)
#Write file of results
#write.csv(GO_00001, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
go_results <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
head(go_results)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807 0 1 11
## 2 2 GO:0006810 0 1 7
## 3 3 GO:0006811 0 1 7
## 4 4 GO:0006873 0 1 6
## 5 5 GO:0006996 0 1 6
## 6 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
go_results_BP <- go_results %>%
filter(ontology=="BP") %>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP)
## [1] 1083 9
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0006807 0 1 11
## 2 2 GO:0006810 0 1 7
## 3 3 GO:0006811 0 1 7
## 4 4 GO:0006873 0 1 6
## 5 5 GO:0006996 0 1 6
## 6 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/biomin_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
dev.off()
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
##
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$bh_adjust), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 768 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 91
The reduced list of terms is 180 terms that falls under 32 parent terms.
#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
Can further reduce like so:
#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane", "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")
Search for these keywords in the parent terms
go_results_BP_reduced <- go_results_BP_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
This is only 17 terms
#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered_keywords.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_biomin <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=bh_adjust)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_biomin
GO_00001_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
GO_00001_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
GO_00001_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_00001_up %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_up)
## [1] 1976 8
go_results_BP_down <- GO_00001_down %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_down)
## [1] 1822 8
go_results_BP_biomin <- GO_00001_biomin %>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>10)%>%
arrange(., bh_adjust)
dim(go_results_BP_biomin)
## [1] 1083 8
all_enriched_terms_up_down_biomin <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm,go_results_BP_biomin$GOterm)
length(all_enriched_terms_up_down_biomin)
## [1] 4881
length(unique(all_enriched_terms_up_down_biomin))
## [1] 2465
By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules or the biomineralization genes.
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down_biomin),
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
reducedTerms <- reduceSimMatrix(simMatrix,
scores = "size",
threshold=0.7,
orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 1941 10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_up_reduced$GOterm))
## [1] 1682
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 134
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_down_reduced$GOterm))
## [1] 1589
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 137
#keep only the goterms from the reduced list
go_results_BP_biomin_reduced <- go_results_BP_biomin %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_biomin_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_biomin_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_biomin_reduced$GOterm))
## [1] 768
length(unique(go_results_BP_biomin_reduced$ParentTerm))
## [1] 107
#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane", "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")
Search for these keywords in the parent terms
go_results_BP_up_reduced_keywords <- go_results_BP_up_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_up_reduced_keywords$GOterm))
## [1] 84
length(unique(go_results_BP_up_reduced_keywords$ParentTerm))
## [1] 6
go_results_BP_down_reduced_keywords <- go_results_BP_down_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_down_reduced_keywords$GOterm))
## [1] 72
length(unique(go_results_BP_down_reduced_keywords$ParentTerm))
## [1] 6
go_results_BP_biomin_reduced_keywords <- go_results_BP_biomin_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_biomin_reduced_keywords$GOterm))
## [1] 61
length(unique(go_results_BP_biomin_reduced_keywords$ParentTerm))
## [1] 6
#write.csv(go_results_BP_up_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#write.csv(go_results_BP_biomin_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
Won’t move forward with these for now, but they are there if we need further reduced lists.
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>%
mutate(Factor = "Up")
cal_up_terms_parent_nterm <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Up") %>% ungroup()
head(cal_up_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 1215
## 2 GO:0007275 0 1 978
## 3 GO:0008152 0 1 1387
## 4 GO:0009987 0 1 1938
## 5 GO:0016043 0 1 1049
## 6 GO:0019222 0 1 982
## numInCat term ontology bh_adjust
## 1 1215 nitrogen compound metabolic process BP 0
## 2 978 multicellular organism development BP 0
## 3 1387 metabolic process BP 0
## 4 1938 cellular process BP 0
## 5 1049 cellular component organization BP 0
## 6 982 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Up
## 2 cell differentiation Up
## 3 metabolic process Up
## 4 cellular process Up
## 5 mitochondrion organization Up
## 6 regulation of DNA-templated transcription Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
cal_down_terms_parent_nterm <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Down") %>% ungroup()
head(cal_down_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 910
## 2 GO:0007275 0 1 610
## 3 GO:0008152 0 1 998
## 4 GO:0009987 0 1 1277
## 5 GO:0016043 0 1 652
## 6 GO:0019222 0 1 599
## numInCat term ontology bh_adjust
## 1 910 nitrogen compound metabolic process BP 0
## 2 610 multicellular organism development BP 0
## 3 998 metabolic process BP 0
## 4 1277 cellular process BP 0
## 5 652 cellular component organization BP 0
## 6 599 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Down
## 2 cell differentiation Down
## 3 metabolic process Down
## 4 cellular process Down
## 5 mitochondrion organization Down
## 6 regulation of DNA-templated transcription Down
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.
#cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- go_results_BP_biomin_reduced
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin")
head(cal_biomin_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0006807 0 1 11
## 2 GO:0006810 0 1 7
## 3 GO:0006811 0 1 7
## 4 GO:0006873 0 1 6
## 5 GO:0006996 0 1 6
## 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
## ParentTerm Factor
## 1 metabolic process Biomin
## 2 transport Biomin
## 3 monoatomic ion transport Biomin
## 4 intracellular calcium ion homeostasis Biomin
## 5 mitochondrion organization Biomin
## 6 intracellular receptor signaling pathway Biomin
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.0001) within the modules that were upregulated or downregulated for calcification or in the biomineralization gene list. The list has been further reduced by using the package rrvgo.
all_terms <- rbind(cal_down_terms,cal_up_terms,cal_biomin_terms)
all_terms <- all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm")]
all_terms$GOterm<-as.factor(all_terms$GOterm)
dim(all_terms) #3453 reduced terms
## [1] 4039 10
length(unique(all_terms$GOterm)) #this represents 1817 unique terms between the three lists of reduced terms
## [1] 1941
length(unique(all_terms$ParentTerm)) #this represents 136 unique terms between the three lists of reduced terms
## [1] 143
This is collapsing the list from 3273 rows to only 1809, representing the 1809 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", "),
term = paste(unique(term), collapse = ", ")
)
dim(goterms_shared)
## [1] 1941 4
goterms_shared_full <- goterms_shared %>%
left_join(dplyr::select(all_terms,GOterm, Factor, bh_adjust), by = "GOterm") %>% #select 3 columns GOterm, Factor, bh_adjust from all_terms and left join by GOterm, turns this dataframe from 1817 rows back to the 3453 in all_terms
mutate(bh_adjust_Up = ifelse(Factor.y == "Up", bh_adjust, NA)) %>% #add a column that is the p-value for the Up factor
mutate(bh_adjust_Down = ifelse(Factor.y == "Down", bh_adjust, NA)) %>% #add a column that is the p-value for the Down factor
mutate(bh_adjust_Biomin = ifelse(Factor.y == "Biomin", bh_adjust, NA)) %>% #add a column that is the p-value for the Biomin factor
dplyr::select(-c("bh_adjust", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
dplyr::summarize(bh_adjust_Up = na.omit(bh_adjust_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
bh_adjust_Down = na.omit(bh_adjust_Down)[1], #carry over the p-value for the term in the down direction, by taking the first non-NA value.
bh_adjust_Biomin = na.omit(bh_adjust_Biomin)[1]) %>% #carry over the p-value for the term in the biomin list, by taking the first non-NA value.
rename(Factor.x = "Factor") #rename column
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")
goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()
goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Biomin")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
goterms_shared_only_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up, Biomin" | Factor=="Up, Biomin" | Factor=="Down, Biomin")
nrow(goterms_up)
## [1] 199
nrow(goterms_down)
## [1] 118
nrow(goterms_biomin)
## [1] 132
nrow(goterms_shared_only)
## [1] 856
nrow(goterms_shared_only_biomin)
## [1] 636
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_biomin) + nrow(goterms_shared_only) + nrow(goterms_shared_only_biomin) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 70
length(unique(goterms_down$ParentTerm))
## [1] 54
length(unique(goterms_biomin$ParentTerm))
## [1] 44
length(unique(goterms_shared_only$ParentTerm))
## [1] 114
length(unique(goterms_shared_only_biomin$ParentTerm))
## [1] 101
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
#parent_filtered_up <- result_parent_unique %>%
# dplyr::filter(Factor == "Up" | Factor=="Up, Biomin")
# #dplyr::filter(SharedGOterms>=5)
parent_filtered_up <- result_parent_unique %>%
dplyr::filter(Factor=="Up, Biomin")
#dplyr::filter(SharedGOterms>=5)
#parent_filtered_down <- result_parent_unique %>%
# dplyr::filter(Factor=="Down" | Factor=="Down, Biomin")
# #dplyr::filter(SharedGOterms>=5)
parent_filtered_down <- result_parent_unique %>%
dplyr::filter(Factor=="Down, Biomin")
#dplyr::filter(SharedGOterms>=5)
merged_up <- parent_filtered_up %>%
dplyr::group_by(ParentTerm) %>%
left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
rename(SharedGOterms = "UniqueGOTerms") %>%
mutate(
Has_Biomin = any(Factor == "Up, Biomin"),
Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
Factor %in% c("Up, Biomin"),
UniqueGOTerms / Sum_of_UniqueGOterms,
0
)
) %>%
mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up"))
merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 14 × 9
## # Groups: ParentTerm [14]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 cellular oxidant… Up, B… 2 12 Up
## 2 cilium assembly Up, B… 2 27 Up
## 3 fatty acid biosy… Up, B… 2 33 Up
## 4 hydrogen peroxid… Up, B… 2 3 Up
## 5 morphogenesis of… Up, B… 2 19 Up
## 6 nervous system d… Up, B… 2 31 Up
## 7 synapse organiza… Up, B… 2 15 Up
## 8 actin cytoskelet… Up, B… 1 12 Up
## 9 embryo developme… Up, B… 1 8 Up
## 10 intracellular ca… Up, B… 1 24 Up
## 11 nuclear migration Up, B… 1 16 Up
## 12 regulation of ch… Up, B… 1 12 Up
## 13 regulation of me… Up, B… 1 26 Up
## 14 sensory percepti… Up, B… 1 18 Up
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
cal_freq_terms_filtered_up <- merged_up_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 14 × 9
## # Groups: ParentTerm [14]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 cellular oxidant… Up, B… 2 12 Up
## 2 cilium assembly Up, B… 2 27 Up
## 3 fatty acid biosy… Up, B… 2 33 Up
## 4 hydrogen peroxid… Up, B… 2 3 Up
## 5 morphogenesis of… Up, B… 2 19 Up
## 6 nervous system d… Up, B… 2 31 Up
## 7 synapse organiza… Up, B… 2 15 Up
## 8 actin cytoskelet… Up, B… 1 12 Up
## 9 embryo developme… Up, B… 1 8 Up
## 10 intracellular ca… Up, B… 1 24 Up
## 11 nuclear migration Up, B… 1 16 Up
## 12 regulation of ch… Up, B… 1 12 Up
## 13 regulation of me… Up, B… 1 26 Up
## 14 sensory percepti… Up, B… 1 18 Up
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12)) #making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique_biomin.pdf", plot=freq_fig_up, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
cal_freq_terms_filtered_down <- merged_down_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 8 × 9
## # Groups: ParentTerm [8]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 protein ubiquitin… Down,… 2 17 Down
## 2 apoptotic process Down,… 1 25 Down
## 3 detection of stim… Down,… 1 4 Down
## 4 mitochondrion org… Down,… 1 28 Down
## 5 muscle organ deve… Down,… 1 14 Down
## 6 regulation of lip… Down,… 1 5 Down
## 7 sensory perceptio… Down,… 1 19 Down
## 8 transmembrane tra… Down,… 1 7 Down
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique_biomin.pdf", plot=freq_fig_down, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- cal_up_terms %>%
mutate(Factor = "Up")
cal_up_terms_parent_nterm <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Up") %>% ungroup()
head(cal_up_terms)
## X.1 X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 1 GO:0006807 0 1 1215
## 2 2 2 GO:0007275 0 1 978
## 3 3 4 GO:0008152 0 1 1387
## 4 4 5 GO:0009987 0 1 1938
## 5 5 6 GO:0016043 0 1 1049
## 6 6 7 GO:0019222 0 1 982
## numInCat term ontology bh_adjust
## 1 1215 nitrogen compound metabolic process BP 0
## 2 978 multicellular organism development BP 0
## 3 1387 metabolic process BP 0
## 4 1938 cellular process BP 0
## 5 1049 cellular component organization BP 0
## 6 982 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Up
## 2 developmental process Up
## 3 metabolic process Up
## 4 cellular process Up
## 5 cellular component organization or biogenesis Up
## 6 regulation of metabolic process Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
cal_down_terms_parent_nterm <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Down") %>% ungroup()
head(cal_down_terms)
## X.1 X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 1 GO:0006807 0 1 910
## 2 2 2 GO:0007275 0 1 610
## 3 3 4 GO:0008152 0 1 998
## 4 4 5 GO:0009987 0 1 1277
## 5 5 6 GO:0016043 0 1 652
## 6 6 7 GO:0019222 0 1 599
## numInCat term ontology bh_adjust
## 1 910 nitrogen compound metabolic process BP 0
## 2 610 multicellular organism development BP 0
## 3 998 metabolic process BP 0
## 4 1277 cellular process BP 0
## 5 652 cellular component organization BP 0
## 6 599 regulation of metabolic process BP 0
## ParentTerm Factor
## 1 metabolic process Down
## 2 developmental process Down
## 3 metabolic process Down
## 4 cellular process Down
## 5 cellular component organization or biogenesis Down
## 6 regulation of metabolic process Down
cal_freq_terms<- merge(cal_up_terms_parent_nterm,cal_down_terms_parent_nterm, by=c("ParentTerm","Number.of.terms","Calcification.direction"),all=T)
cal_freq_terms
## ParentTerm
## 1 actin filament-based process
## 2 actin filament-based process
## 3 aging
## 4 aging
## 5 amide metabolic process
## 6 amide metabolic process
## 7 ammonium ion metabolic process
## 8 ammonium ion metabolic process
## 9 anatomical structure morphogenesis
## 10 anatomical structure morphogenesis
## 11 animal organ development
## 12 animal organ development
## 13 behavior
## 14 behavior
## 15 biological process involved in interspecies interaction between organisms
## 16 biological process involved in symbiotic interaction
## 17 biosynthetic process
## 18 biosynthetic process
## 19 carbohydrate derivative metabolic process
## 20 carbohydrate derivative metabolic process
## 21 carbohydrate metabolic process
## 22 carbohydrate metabolic process
## 23 catabolic process
## 24 catabolic process
## 25 cell activation
## 26 cell activation
## 27 cell communication
## 28 cell communication
## 29 cell cycle
## 30 cell cycle
## 31 cell division
## 32 cell division
## 33 cell fate commitment
## 34 cell fate commitment
## 35 cell motility
## 36 cell motility
## 37 cell projection organization
## 38 cell projection organization
## 39 cell recognition
## 40 cell recognition
## 41 cell surface receptor signaling pathway
## 42 cell-cell signaling
## 43 cellular aldehyde metabolic process
## 44 cellular component biogenesis
## 45 cellular component biogenesis
## 46 cellular component disassembly
## 47 cellular component organization or biogenesis
## 48 cellular component organization or biogenesis
## 49 cellular localization
## 50 cellular localization
## 51 cellular macromolecule metabolic process
## 52 cellular macromolecule metabolic process
## 53 cellular modified amino acid metabolic process
## 54 cellular modified amino acid metabolic process
## 55 cellular process
## 56 cellular process
## 57 cellular response to environmental stimulus
## 58 cellular response to environmental stimulus
## 59 chromatin organization
## 60 chromatin organization
## 61 coagulation
## 62 coagulation
## 63 cytoplasm organization
## 64 cytoplasm organization
## 65 defense response
## 66 detection of stimulus
## 67 developmental maturation
## 68 developmental maturation
## 69 developmental process
## 70 developmental process
## 71 DNA metabolic process
## 72 embryo development
## 73 endomembrane system organization
## 74 establishment or maintenance of cell polarity
## 75 establishment or maintenance of cell polarity
## 76 extracellular structure organization
## 77 generation of precursor metabolites and energy
## 78 generation of precursor metabolites and energy
## 79 glycosylation
## 80 glycosylation
## 81 head development
## 82 head development
## 83 hemopoiesis
## 84 hemopoiesis
## 85 homeostatic process
## 86 homeostatic process
## 87 immune system process
## 88 immune system process
## 89 import into cell
## 90 import into cell
## 91 intracellular signal transduction
## 92 lipid metabolic process
## 93 lipid metabolic process
## 94 localization
## 95 localization
## 96 locomotion
## 97 locomotion
## 98 macromolecule localization
## 99 macromolecule localization
## 100 macromolecule modification
## 101 macromolecule modification
## 102 maintenance of cell number
## 103 maintenance of cell number
## 104 maintenance of location
## 105 maintenance of location
## 106 membrane organization
## 107 membrane organization
## 108 metabolic process
## 109 metabolic process
## 110 methylation
## 111 methylation
## 112 microtubule bundle formation
## 113 microtubule-based process
## 114 microtubule-based process
## 115 microtubule-based transport
## 116 microtubule-based transport
## 117 mitochondrial gene expression
## 118 mitochondrial gene expression
## 119 molting cycle
## 120 molting cycle
## 121 monoatomic ion transport
## 122 monoatomic ion transport
## 123 multi-multicellular organism process
## 124 multi-multicellular organism process
## 125 multicellular organismal process
## 126 multicellular organismal process
## 127 muscle structure development
## 128 muscle structure development
## 129 negative regulation of biological process
## 130 negative regulation of biological process
## 131 neurotransmitter transport
## 132 neurotransmitter transport
## 133 nitrogen compound transport
## 134 nitrogen compound transport
## 135 organelle localization
## 136 organelle localization
## 137 organelle localization by membrane tethering
## 138 organelle localization by membrane tethering
## 139 organic cyclic compound metabolic process
## 140 organic cyclic compound metabolic process
## 141 organic hydroxy compound metabolic process
## 142 organic hydroxy compound metabolic process
## 143 organic substance transport
## 144 organic substance transport
## 145 organophosphate metabolic process
## 146 pattern specification process
## 147 pattern specification process
## 148 peptidyl-amino acid modification
## 149 peptidyl-amino acid modification
## 150 phosphorus metabolic process
## 151 phosphorus metabolic process
## 152 pigment metabolic process
## 153 pigment metabolic process
## 154 pigmentation
## 155 pigmentation
## 156 positive regulation of biological process
## 157 positive regulation of biological process
## 158 post-embryonic development
## 159 post-embryonic development
## 160 protein folding
## 161 protein folding
## 162 protein maturation
## 163 protein-containing complex localization
## 164 protein-containing complex localization
## 165 protein-containing complex organization
## 166 protein-containing complex organization
## 167 proteolysis
## 168 proteolysis
## 169 pyridine-containing compound metabolic process
## 170 pyridine-containing compound metabolic process
## 171 receptor metabolic process
## 172 receptor metabolic process
## 173 regulation of autophagy
## 174 regulation of autophagy
## 175 regulation of biological quality
## 176 regulation of biological quality
## 177 regulation of cell adhesion
## 178 regulation of cell adhesion
## 179 regulation of cell communication
## 180 regulation of cell communication
## 181 regulation of cell death
## 182 regulation of cell death
## 183 regulation of cell population proliferation
## 184 regulation of cell population proliferation
## 185 regulation of cellular component organization
## 186 regulation of cellular component organization
## 187 regulation of cellular process
## 188 regulation of cellular process
## 189 regulation of cytokine production
## 190 regulation of developmental process
## 191 regulation of developmental process
## 192 regulation of growth
## 193 regulation of growth
## 194 regulation of localization
## 195 regulation of localization
## 196 regulation of metabolic process
## 197 regulation of metabolic process
## 198 regulation of molecular function
## 199 regulation of molecular function
## 200 regulation of multicellular organismal process
## 201 regulation of multicellular organismal process
## 202 regulation of plasma lipoprotein particle levels
## 203 regulation of reactive oxygen species metabolic process
## 204 regulation of reactive oxygen species metabolic process
## 205 regulation of response to stimulus
## 206 regulation of response to stimulus
## 207 reproduction
## 208 reproduction
## 209 response to abiotic stimulus
## 210 response to abiotic stimulus
## 211 response to biotic stimulus
## 212 response to biotic stimulus
## 213 response to chemical
## 214 response to chemical
## 215 response to endogenous stimulus
## 216 response to endogenous stimulus
## 217 response to external stimulus
## 218 response to external stimulus
## 219 response to organic substance
## 220 response to stimulus
## 221 response to stimulus
## 222 response to stress
## 223 response to stress
## 224 response to xenobiotic stimulus
## 225 response to xenobiotic stimulus
## 226 rhythmic process
## 227 rhythmic process
## 228 RNA processing
## 229 RNA processing
## 230 secondary metabolic process
## 231 secondary metabolic process
## 232 secretion
## 233 secretion
## 234 signaling
## 235 signaling
## 236 sleep
## 237 small molecule metabolic process
## 238 small molecule metabolic process
## 239 sulfur compound metabolic process
## 240 sulfur compound metabolic process
## 241 synapse organization
## 242 synapse organization
## 243 system development
## 244 system development
## 245 system process
## 246 system process
## 247 tissue development
## 248 tissue development
## 249 tissue migration
## 250 tissue migration
## 251 tube development
## 252 tube development
## 253 vesicle organization
## 254 vesicle-mediated transport
## 255 vesicle-mediated transport
## 256 viral process
## 257 viral process
## Number.of.terms Calcification.direction
## 1 10 Down
## 2 12 Up
## 3 2 Down
## 4 2 Up
## 5 12 Down
## 6 12 Up
## 7 1 Down
## 8 1 Up
## 9 23 Down
## 10 28 Up
## 11 16 Down
## 12 16 Up
## 13 8 Down
## 14 8 Up
## 15 6 Down
## 16 5 Up
## 17 20 Up
## 18 24 Down
## 19 17 Down
## 20 22 Up
## 21 10 Up
## 22 12 Down
## 23 36 Up
## 24 39 Down
## 25 13 Down
## 26 13 Up
## 27 2 Down
## 28 16 Up
## 29 41 Down
## 30 50 Up
## 31 7 Down
## 32 10 Up
## 33 8 Down
## 34 8 Up
## 35 8 Down
## 36 9 Up
## 37 20 Down
## 38 27 Up
## 39 2 Down
## 40 2 Up
## 41 22 Up
## 42 19 Down
## 43 1 Down
## 44 18 Up
## 45 24 Down
## 46 5 Up
## 47 26 Down
## 48 31 Up
## 49 24 Down
## 50 31 Up
## 51 8 Up
## 52 21 Down
## 53 1 Up
## 54 2 Down
## 55 1 Down
## 56 1 Up
## 57 5 Down
## 58 6 Up
## 59 6 Down
## 60 6 Up
## 61 2 Down
## 62 3 Up
## 63 1 Down
## 64 1 Up
## 65 12 Up
## 66 4 Up
## 67 6 Down
## 68 9 Up
## 69 10 Up
## 70 12 Down
## 71 16 Up
## 72 8 Up
## 73 4 Down
## 74 5 Down
## 75 6 Up
## 76 3 Up
## 77 8 Up
## 78 11 Down
## 79 4 Down
## 80 4 Up
## 81 2 Down
## 82 2 Up
## 83 4 Down
## 84 6 Up
## 85 22 Down
## 86 24 Up
## 87 23 Down
## 88 25 Up
## 89 1 Down
## 90 1 Up
## 91 32 Up
## 92 16 Down
## 93 28 Up
## 94 10 Up
## 95 11 Down
## 96 7 Down
## 97 12 Up
## 98 26 Up
## 99 28 Down
## 100 14 Up
## 101 17 Down
## 102 3 Down
## 103 3 Up
## 104 4 Up
## 105 5 Down
## 106 6 Down
## 107 10 Up
## 108 15 Down
## 109 15 Up
## 110 2 Down
## 111 2 Up
## 112 4 Down
## 113 11 Down
## 114 17 Up
## 115 3 Down
## 116 3 Up
## 117 2 Down
## 118 2 Up
## 119 2 Down
## 120 2 Up
## 121 8 Down
## 122 10 Up
## 123 1 Down
## 124 1 Up
## 125 2 Down
## 126 2 Up
## 127 11 Up
## 128 14 Down
## 129 17 Up
## 130 18 Down
## 131 2 Up
## 132 3 Down
## 133 10 Up
## 134 12 Down
## 135 9 Down
## 136 16 Up
## 137 2 Down
## 138 2 Up
## 139 5 Down
## 140 5 Up
## 141 4 Down
## 142 4 Up
## 143 5 Down
## 144 8 Up
## 145 13 Down
## 146 17 Down
## 147 17 Up
## 148 20 Down
## 149 22 Up
## 150 16 Down
## 151 27 Up
## 152 1 Down
## 153 2 Up
## 154 2 Down
## 155 2 Up
## 156 16 Down
## 157 16 Up
## 158 5 Up
## 159 6 Down
## 160 2 Down
## 161 3 Up
## 162 3 Down
## 163 1 Down
## 164 1 Up
## 165 11 Down
## 166 12 Up
## 167 5 Down
## 168 7 Up
## 169 3 Up
## 170 5 Down
## 171 1 Down
## 172 1 Up
## 173 6 Up
## 174 7 Down
## 175 22 Down
## 176 26 Up
## 177 9 Down
## 178 11 Up
## 179 10 Up
## 180 34 Down
## 181 25 Down
## 182 25 Up
## 183 10 Down
## 184 12 Up
## 185 20 Down
## 186 22 Up
## 187 3 Down
## 188 3 Up
## 189 4 Up
## 190 11 Up
## 191 27 Down
## 192 22 Down
## 193 24 Up
## 194 40 Down
## 195 40 Up
## 196 49 Down
## 197 50 Up
## 198 45 Up
## 199 49 Down
## 200 9 Down
## 201 12 Up
## 202 1 Down
## 203 1 Down
## 204 3 Up
## 205 12 Up
## 206 14 Down
## 207 37 Down
## 208 37 Up
## 209 14 Down
## 210 16 Up
## 211 9 Up
## 212 10 Down
## 213 15 Up
## 214 65 Down
## 215 13 Down
## 216 16 Up
## 217 9 Up
## 218 13 Down
## 219 56 Up
## 220 2 Up
## 221 6 Down
## 222 15 Up
## 223 20 Down
## 224 3 Up
## 225 4 Down
## 226 3 Down
## 227 3 Up
## 228 28 Down
## 229 28 Up
## 230 1 Down
## 231 1 Up
## 232 19 Down
## 233 19 Up
## 234 14 Up
## 235 30 Down
## 236 1 Down
## 237 43 Up
## 238 56 Down
## 239 2 Down
## 240 2 Up
## 241 9 Down
## 242 15 Up
## 243 27 Down
## 244 31 Up
## 245 18 Down
## 246 18 Up
## 247 15 Down
## 248 19 Up
## 249 5 Down
## 250 5 Up
## 251 6 Down
## 252 6 Up
## 253 4 Up
## 254 14 Up
## 255 15 Down
## 256 1 Down
## 257 2 Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P_adj < 0.00001) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.
cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin")
head(cal_biomin_terms)
## X.1 X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 1 GO:0006807 0 1 11
## 2 2 2 GO:0006810 0 1 7
## 3 3 3 GO:0006811 0 1 7
## 4 4 4 GO:0006873 0 1 6
## 5 5 5 GO:0006996 0 1 6
## 6 6 6 GO:0007154 0 1 9
## numInCat term ontology bh_adjust
## 1 11 nitrogen compound metabolic process BP 0
## 2 7 transport BP 0
## 3 7 monoatomic ion transport BP 0
## 4 6 intracellular monoatomic ion homeostasis BP 0
## 5 6 organelle organization BP 0
## 6 9 cell communication BP 0
## ParentTerm Factor
## 1 primary metabolic process Biomin
## 2 localization Biomin
## 3 monoatomic ion transport Biomin
## 4 homeostatic process Biomin
## 5 cellular component organization or biogenesis Biomin
## 6 cell communication Biomin
#cal_biomin_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/Biomineralizationgo terms.csv")
#cal_biomin_terms
# cal_up_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/goseq_pattern_calcification_filtered.csv")
# cal_up_terms<- cal_up_terms %>%
# mutate(Factor = "Up")
# cal_up_terms
# cal_down_terms <-read.csv("/Users/imkri/Desktop/Penn Postdoc/Heron experiment/goseq_pattern_calcification_down_filtered.csv")
# cal_down_terms<-cal_down_terms %>%
# mutate(Factor = "Down")
# cal_down_terms
colnames(cal_biomin_terms)
## [1] "X.1" "X"
## [3] "GOterm" "over_represented_pvalue"
## [5] "under_represented_pvalue" "numDEInCat"
## [7] "numInCat" "term"
## [9] "ontology" "bh_adjust"
## [11] "ParentTerm" "Factor"
all_terms<- merge(cal_up_terms,cal_down_terms, by=c("Factor","GOterm","X.1","X","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm"),all=T)
all_terms<- merge(all_terms,cal_biomin_terms, by=c("Factor","GOterm","X.1","X","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","bh_adjust","ParentTerm"),all=T)
all_terms$GOterm<-as.factor(all_terms$GOterm)
head(all_terms)
## Factor GOterm X.1 X over_represented_pvalue under_represented_pvalue
## 1 Biomin GO:0000003 87 88 1.201233e-10 1
## 2 Biomin GO:0000041 455 624 1.015731e-02 1
## 3 Biomin GO:0000122 388 462 6.463204e-03 1
## 4 Biomin GO:0000132 726 1029 1.746098e-02 1
## 5 Biomin GO:0000226 178 184 1.981187e-06 1
## 6 Biomin GO:0000278 248 275 1.014913e-04 1
## numDEInCat numInCat term
## 1 5 5 reproduction
## 2 1 1 transition metal ion transport
## 3 1 1 negative regulation of transcription by RNA polymerase II
## 4 1 1 establishment of mitotic spindle orientation
## 5 3 3 microtubule cytoskeleton organization
## 6 2 2 mitotic cell cycle
## ontology bh_adjust ParentTerm
## 1 BP 1.286789e-09 reproduction
## 2 BP 1.269710e-02 monoatomic ion transport
## 3 BP 1.269710e-02 negative regulation of biological process
## 4 BP 1.746098e-02 establishment or maintenance of cell polarity
## 5 BP 9.966176e-06 microtubule-based process
## 6 BP 3.523580e-04 mitotic cell cycle
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", ")
)
head(goterms_shared)
## # A tibble: 6 × 3
## GOterm ParentTerm Factor
## <fct> <chr> <chr>
## 1 GO:0000003 reproduction Biomin, Down, Up
## 2 GO:0000028 cellular component biogenesis Down
## 3 GO:0000041 monoatomic ion transport Biomin, Down, Up
## 4 GO:0000070 cellular component organization or biogenesis Down, Up
## 5 GO:0000075 cell cycle Down, Up
## 6 GO:0000077 cell cycle, intracellular signal transduction Down, Up
#write.csv(goterms_shared, "Merged GOterms by factor and ParentTerm.csv")
result_unique <- goterms_shared %>%
group_by(ParentTerm,Factor) %>%
dplyr::summarise(SharedGOterms = n_distinct(GOterm))%>%
arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
result_unique
## # A tibble: 445 × 3
## # Groups: ParentTerm [243]
## ParentTerm Factor SharedGOterms
## <chr> <chr> <int>
## 1 cell cycle Down, Up 31
## 2 small molecule metabolic process Down, Up 29
## 3 RNA processing Down, Up 27
## 4 regulation of localization Down, Up 25
## 5 response to chemical, response to organic substance Down, Up 25
## 6 regulation of molecular function Down, Up 24
## 7 regulation of metabolic process Biomin, Do… 22
## 8 catabolic process Down, Up 20
## 9 regulation of metabolic process Down, Up 20
## 10 anatomical structure morphogenesis Biomin, Do… 19
## # ℹ 435 more rows
result_filtered_up<- result_unique %>%
dplyr::filter(Factor=="Biomin, Down, Up" | Factor=="Biomin, Up")
#dplyr::filter(SharedGOterms>=5)
result_filtered_up
## # A tibble: 138 × 3
## # Groups: ParentTerm [125]
## ParentTerm Factor SharedGOterms
## <chr> <chr> <int>
## 1 regulation of metabolic process Biomi… 22
## 2 anatomical structure morphogenesis Biomi… 19
## 3 circulatory system development, system development Biomi… 19
## 4 regulation of molecular function Biomi… 17
## 5 negative regulation of biological process Biomi… 16
## 6 reproduction Biomi… 16
## 7 cellular catabolic process, catabolic process Biomi… 15
## 8 response to cytokine, response to chemical, response to… Biomi… 15
## 9 tissue development Biomi… 14
## 10 cell projection organization Biomi… 13
## # ℹ 128 more rows
result_filtered_down<- result_unique %>%
dplyr::filter(Factor=="Biomin, Down, Up" | Factor=="Biomin, Down")
#dplyr::filter(SharedGOterms>=5)
result_filtered_down
## # A tibble: 132 × 3
## # Groups: ParentTerm [125]
## ParentTerm Factor SharedGOterms
## <chr> <chr> <int>
## 1 regulation of metabolic process Biomi… 22
## 2 anatomical structure morphogenesis Biomi… 19
## 3 circulatory system development, system development Biomi… 19
## 4 regulation of molecular function Biomi… 17
## 5 negative regulation of biological process Biomi… 16
## 6 reproduction Biomi… 16
## 7 cellular catabolic process, catabolic process Biomi… 15
## 8 response to cytokine, response to chemical, response to… Biomi… 15
## 9 tissue development Biomi… 14
## 10 cell projection organization Biomi… 13
## # ℹ 122 more rows
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
head(cal_freq_terms_filtered_up)
## # A tibble: 6 × 6
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 actin filament-ba… 5 12 Up
## 2 amide metabolic p… 2 12 Up
## 3 anatomical struct… 23 28 Up
## 4 animal organ deve… 12 16 Up
## 5 biosynthetic proc… 1 20 Up
## 6 carbohydrate deri… 4 22 Up
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 2 more variables:
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,70))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_KB.pdf", plot=freq_fig_up, dpi=300, height=20, width=10, units="in", limitsize=FALSE)
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 55 × 6
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 actin filament-b… 4 10 Down
## 2 amide metabolic … 2 12 Down
## 3 anatomical struc… 23 23 Down
## 4 animal organ dev… 12 16 Down
## 5 biosynthetic pro… 1 24 Down
## 6 carbohydrate der… 4 17 Down
## 7 catabolic process 15 39 Down
## 8 cell cycle 9 41 Down
## 9 cell projection … 13 20 Down
## 10 cell-cell signal… 8 19 Down
## # ℹ 45 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 2 more variables:
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,70))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_KB.pdf", plot=freq_fig_down, dpi=300, height=20, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs